{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import os\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "import re\n",
    "import collections\n",
    "import random\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "maxlen = 20\n",
    "location = os.getcwd()\n",
    "num_layers = 3\n",
    "size_layer = 256\n",
    "learning_rate = 0.0001\n",
    "batch = 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('dataset-emotion.p', 'rb') as fopen:\n",
    "    df = pickle.load(fopen)\n",
    "with open('vector-emotion.p', 'rb') as fopen:\n",
    "    vectors = pickle.load(fopen)\n",
    "with open('dataset-dictionary.p', 'rb') as fopen:\n",
    "    dictionary = pickle.load(fopen)\n",
    "label = np.unique(df[:,1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.cross_validation import train_test_split\n",
    "train_X, test_X, train_Y, test_Y = train_test_split(df[:, 0], df[:, 1], test_size = 0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Model:\n",
    "    def __init__(self, sequence_length, dimension_input, dimension_output, \n",
    "                 learning_rate, filter_sizes, pooling_size, out_dimension, num_layer):\n",
    "        self.X = tf.placeholder(tf.float32, shape=[None, sequence_length, dimension_input, 1])\n",
    "        self.Y = tf.placeholder(tf.float32, shape=[None, dimension_output])\n",
    "        pooled_outputs = []\n",
    "        reduce_size = int(np.ceil((sequence_length) * 1.0 / pooling_size))\n",
    "        for i in filter_sizes:\n",
    "            w = tf.Variable(tf.truncated_normal([i, dimension_input, 1, out_dimension], stddev=0.1))\n",
    "            b = tf.Variable(tf.truncated_normal([out_dimension], stddev = 0.01))\n",
    "            conv = tf.nn.relu(tf.nn.conv2d(self.X, w, strides=[1, 1, 1, 1],padding=\"VALID\") + b)\n",
    "            pooled = tf.nn.max_pool(conv,ksize=[1, pooling_size, 1, 1],strides=[1, pooling_size, 1, 1],padding='VALID')\n",
    "            pooled = tf.reshape(pooled, [-1, reduce_size -1, out_dimension])\n",
    "            pooled_outputs.append(pooled)\n",
    "        h_pool = tf.concat(pooled_outputs, 2)\n",
    "        def lstm_cell():\n",
    "            return tf.nn.rnn_cell.LSTMCell(out_dimension)\n",
    "        self.rnn_cells = tf.nn.rnn_cell.MultiRNNCell([lstm_cell() for _ in range(num_layers)])\n",
    "        drop = tf.contrib.rnn.DropoutWrapper(self.rnn_cells, output_keep_prob = 0.5)\n",
    "        self.outputs, self.last_state = tf.nn.dynamic_rnn(drop, h_pool, dtype = tf.float32)\n",
    "        self.rnn_W = tf.Variable(tf.random_normal((out_dimension, dimension_output)))\n",
    "        self.rnn_B = tf.Variable(tf.random_normal([dimension_output]))\n",
    "        self.logits = tf.matmul(self.outputs[:, -1], self.rnn_W) + self.rnn_B\n",
    "        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.logits, labels = self.Y))\n",
    "        l2 = sum(0.0005 * tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())\n",
    "        self.cost += l2\n",
    "        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)\n",
    "        self.correct_pred = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))\n",
    "        self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "'unwarrentedly'\n",
      "epoch: 0 , pass acc: 0 , current acc: 0.638763485371\n",
      "epoch: 1 , training loss: 2.28037899679 , training acc: 0.537168547851 , valid loss: 1.75857370441 , valid acc: 0.638763485371\n",
      "'unwarrentedly'\n",
      "epoch: 1 , pass acc: 0.638763485371 , current acc: 0.66034811728\n",
      "epoch: 2 , training loss: 1.58979661385 , training acc: 0.657819414244 , valid loss: 1.45900135596 , valid acc: 0.66034811728\n",
      "'unwarrentedly'\n",
      "epoch: 2 , pass acc: 0.66034811728 , current acc: 0.663337312039\n",
      "epoch: 3 , training loss: 1.35582725925 , training acc: 0.674838010269 , valid loss: 1.298817383 , valid acc: 0.663337312039\n",
      "'unwarrentedly'\n",
      "epoch: 4 , training loss: 1.21278129236 , training acc: 0.685833811635 , valid loss: 1.21625504681 , valid acc: 0.658199257854\n",
      "'unwarrentedly'\n",
      "epoch: 5 , training loss: 1.11851639671 , training acc: 0.695278922079 , valid loss: 1.15399059693 , valid acc: 0.660276088179\n",
      "'unwarrentedly'\n",
      "epoch: 6 , training loss: 1.05010887392 , training acc: 0.706856607443 , valid loss: 1.1369298899 , valid acc: 0.662196857732\n",
      "'unwarrentedly'\n",
      "epoch: 7 , training loss: 0.996909444748 , training acc: 0.717723434995 , valid loss: 1.17333474923 , valid acc: 0.657118827498\n",
      "'unwarrentedly'\n",
      "epoch: 8 , training loss: 0.95617748358 , training acc: 0.728218337305 , valid loss: 1.16919824019 , valid acc: 0.6548739271\n",
      "'unwarrentedly'\n",
      "epoch: 9 , training loss: 0.933938052423 , training acc: 0.732384504372 , valid loss: 1.15860650577 , valid acc: 0.654549799189\n",
      "'unwarrentedly'\n",
      "epoch: 10 , training loss: 0.916154200221 , training acc: 0.737141552537 , valid loss: 1.25470847614 , valid acc: 0.634141635888\n",
      "'unwarrentedly'\n",
      "epoch: 11 , training loss: 0.89633845398 , training acc: 0.742852410318 , valid loss: 1.31553278515 , valid acc: 0.62818725459\n",
      "'unwarrentedly'\n",
      "epoch: 12 , training loss: 0.870958457021 , training acc: 0.751430695276 , valid loss: 1.35667084581 , valid acc: 0.625294097582\n",
      "'unwarrentedly'\n",
      "epoch: 13 , training loss: 0.867803699844 , training acc: 0.751742633378 , valid loss: 1.37888059403 , valid acc: 0.626062403385\n",
      "break epoch: 13\n"
     ]
    }
   ],
   "source": [
    "tf.reset_default_graph()\n",
    "sess = tf.InteractiveSession()\n",
    "dimension = vectors.shape[1]\n",
    "model = Model(maxlen, dimension, len(label), learning_rate, [3, 3, 3], 5, size_layer, num_layers)\n",
    "sess.run(tf.global_variables_initializer())\n",
    "saver = tf.train.Saver(tf.global_variables())\n",
    "EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 10, 0, 0, 0\n",
    "while True:\n",
    "    if CURRENT_CHECKPOINT == EARLY_STOPPING:\n",
    "        print('break epoch:', EPOCH)\n",
    "        break\n",
    "    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0\n",
    "    for i in range(0, (train_X.shape[0] // batch) * batch, batch):\n",
    "        batch_x = np.zeros((batch, maxlen, dimension))\n",
    "        batch_y = np.zeros((batch, len(label)))\n",
    "        for k in range(batch):\n",
    "            tokens = train_X[i + k].split()[:maxlen]\n",
    "            for no, text in enumerate(tokens[::-1]):\n",
    "                try:\n",
    "                    batch_x[k, -1 - no, :] += vectors[dictionary[text], :]\n",
    "                except Exception as e:\n",
    "                    print(e)\n",
    "                    continue\n",
    "            batch_y[k, int(train_Y[i + k])] = 1.0\n",
    "        batch_x = np.expand_dims(batch_x, axis=-1)\n",
    "        loss, _ = sess.run([model.cost, model.optimizer], feed_dict = {model.X : batch_x, model.Y : batch_y})\n",
    "        train_loss += loss\n",
    "        train_acc += sess.run(model.accuracy, feed_dict = {model.X : batch_x, model.Y : batch_y})\n",
    "    \n",
    "    for i in range(0, (test_X.shape[0] // batch) * batch, batch):\n",
    "        batch_x = np.zeros((batch, maxlen, dimension))\n",
    "        batch_y = np.zeros((batch, len(label)))\n",
    "        for k in range(batch):\n",
    "            tokens = test_X[i + k].split()[:maxlen]\n",
    "            for no, text in enumerate(tokens[::-1]):\n",
    "                try:\n",
    "                    batch_x[k, -1 - no, :] += vectors[dictionary[text], :]\n",
    "                except:\n",
    "                    continue\n",
    "            batch_y[k, int(test_Y[i + k])] = 1.0\n",
    "        batch_x = np.expand_dims(batch_x, axis=-1)\n",
    "        loss, acc = sess.run([model.cost, model.accuracy], feed_dict = {model.X : batch_x, model.Y : batch_y})\n",
    "        test_loss += loss\n",
    "        test_acc += acc\n",
    "        \n",
    "    train_loss /= (train_X.shape[0] // batch)\n",
    "    train_acc /= (train_X.shape[0] // batch)\n",
    "    test_loss /= (test_X.shape[0] // batch)\n",
    "    test_acc /= (test_X.shape[0] // batch)\n",
    "    if test_acc > CURRENT_ACC:\n",
    "        print('epoch:', EPOCH, ', pass acc:', CURRENT_ACC, ', current acc:', test_acc)\n",
    "        CURRENT_ACC = test_acc\n",
    "        CURRENT_CHECKPOINT = 0\n",
    "        saver.save(sess, os.getcwd() + \"/model-cnn-vector.ckpt\")\n",
    "    else:\n",
    "        CURRENT_CHECKPOINT += 1\n",
    "    EPOCH += 1\n",
    "    print('epoch:', EPOCH, ', training loss:', train_loss, ', training acc:', train_acc, ', valid loss:', test_loss, ', valid acc:', test_acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
